#############################################################
##                                                         ##
##                    Replication Code for 				   ##
## Survival Analysis: A New Guide for Social Scientists    ##
##						  CHAPTER 5 					   ##
##		by Alejandro Quiroz Flores (University of Essex)   ##
##                                                         ## #############################################################

#############################################################
##			Code run in R 3.5.0 in MacOS 10.15.7		   ##
## Necessary packages listed below are installed at system ##
## 					level (in R framework) 				   ## 
## 		Please install necessary package dependencies 	   ##
#############################################################

############################################################# ##                                                         ## ##                    Load Packages                        ## ##                                                         ## #############################################################

library(foreign)
library(survival)
library(mstate)
library(mvna)
library(lattice)
library(cmprsk)
library(etm)
library(simPH)
library(coxme)
library(texreg)
library(ggplot2)

###################################################
##            Set seed for replication           ##
###################################################

set.seed(7777777)

###################################################
##         Set working directory for data        ##
###################################################

setwd("/Users/alex/Dropbox/Papers Essex/CUP Book/Manuscript/Manuscript_CUP_preamble/Submission/Resubmission")

###################################################
## University NPIs Data from Cevasco et al.(2020)##
###################################################

pone <- read.dta(file="pone.0240786.s009_organizedforR2.dta")
class(pone)
names(pone)
attach(pone)

#Create log of number of Covid cases
ln.cases<-log(rw_stateprev+1)
class(ln.cases)

#Create log of number of students
ln.students<-log(drvef2018fulltimeenrollment)
class(ln.students)

#Join with data
pone<-cbind(pone,ln.cases, ln.students)

############################################################# ##                                                         ## ##                  CHAPTER 5 MULTI-STATE                  ## ##                                                         ## #############################################################

###################################################
##       PREPARE LONG DATA FOR MULTI-STATE       ##
###################################################

#The code below transorms a cross-section of universities into a multiple record dataset for estimation of multi-state models

#First, we must set the transition matrix between states

tmat<-trans.illdeath(names=c("No NPI", "Remote Work","Close Campus"))
tmat

#We also need to select the relevant covariates from the dataset for the analysis. These will be the transition-specific covariates

covs<-c("time_tele", "ln.cases", "ln.students", "governorpartyaffiliation")

#The following code uses the call 'msprep' to create the multiple-record data set

pone_long<-msprep(time=c(NA, "time_tele", "time_ess"),status=c(NA,"tele","ess"),data=pone,trans=tmat,keep=covs)

#Explore the data and make sure that this is the correct format
tail(pone_long,30)
table(pone_long$trans)
events(pone_long)

#Now extend the covariates into transition-specific covariates
pone_long<-expand.covs(pone_long,covs,append=T,longnames=F)

detach(pone)
attach(pone_long)

################### FIGURE 5.3 #####################

#This code create the Aalen-Johansen empirical estimate of transition probabilities

#This is an alternative code for creating the matrix of transitions. This will be useful for the using the call 'etm'

tra <- matrix(ncol=3,nrow=3,FALSE)
tra[1, 2:3] <- TRUE
tra[2, 3] <- TRUE
tra

#ETM requieres that relevant variables have specific names, as well as using strings for the censoring indicator. The information is the same, but variable names are changed in order to use the call 'etm'

#Subset the data
pone_long_na<-subset(pone_long, select=c(id,from,to,Tstart,Tstop,status))

#Change the censoring indicator to the string "cens"
pone_long_na$to<-ifelse(pone_long_na$status==0,"cens",pone_long_na$to)
head(pone_long_na)

#Subset the data again but use the converted "to" variable
pone_long_na<-subset(pone_long_na, select=c(id,from,to,Tstart,Tstop))

#Replace the names of variables
names(pone_long_na)<-c("id", "from", "to", "entry", "exit")
head(pone_long_na)

#The following code produces the Aalen-Johansen empirical estimates of transition probabilities 
etm<-etm(pone_long_na,c("1","2","3"),tra,"cens", s=0)

############### INDIVIDUAL FIGURES ################

plot(etm, tr.choice="1 1",conf.int=T,lty=1,xlim=c(60,100),col=c("purple4"),lwd=3, main="No NPI -> No NPI",ci.col="deeppink", ci.lty=3, legend=F, xlab="Days since 31/12/2019", cex.lab=1.25, cex.axis=1.25)


plot(etm, tr.choice="1 2",conf.int=T,lty=1,xlim=c(60,100),col=c("purple4"),lwd=3, main="No NPI -> Remote Work",ci.col="deeppink", ci.lty=3, legend=F, xlab="Days since 31/12/2019", cex.lab=1.25, cex.axis=1.25)


plot(etm, tr.choice="2 3", conf.int=T,lty=1,xlim=c(60,100),col=c("purple4"),lwd=3, main="Remote Work -> Close Campus",ci.col="deeppink", ci.lty=3, legend=F, xlab="Days since 31/12/2019", cex.lab=1.25, cex.axis=1.25)


plot(etm, tr.choice="1 3", conf.int=T,lty=1,xlim=c(60,100),col=c("purple4"),lwd=3, main="No NPI -> Close Campus",ci.col="deeppink", ci.lty=3, legend=F, xlab="Days since 31/12/2019", cex.lab=1.25, cex.axis=1.25)


################### JOINT FIGURES #################

#There are multiple cases given by the combinations of states
layout<-layout(matrix(c(1,2,3,4),2,2,byrow=T))
layout.show(layout)

plot(etm, tr.choice="1 1",conf.int=T,lty=1,xlim=c(60,100),col=c("purple4"),lwd=3, main="No NPI -> No NPI",ci.col="deeppink", ci.lty=3, legend=F, xlab="Days since 31/12/2019", cex.lab=1.25, cex.axis=1.25)

plot(etm, tr.choice="1 2",conf.int=T,lty=1,xlim=c(60,100),col=c("purple4"),lwd=3, main="No NPI -> Remote Work",ci.col="deeppink", ci.lty=3, legend=F, xlab="Days since 31/12/2019", cex.lab=1.25, cex.axis=1.25)

plot(etm, tr.choice="2 3", conf.int=T,lty=1,xlim=c(60,100),col=c("purple4"),lwd=3, main="Remote Work -> Close Campus",ci.col="deeppink", ci.lty=3, legend=F, xlab="Days since 31/12/2019", cex.lab=1.25, cex.axis=1.25)

plot(etm, tr.choice="1 3", conf.int=T,lty=1,xlim=c(60,100),col=c("purple4"),lwd=3, main="No NPI -> Close Campus",ci.col="deeppink", ci.lty=3, legend=F, xlab="Days since 31/12/2019", cex.lab=1.25, cex.axis=1.25)


################### TABLE 5.5 #####################

#Always good to inspect the survival object and check correct number of failures
 
#Note that the censoring indicator must now be set as a factor

class(status)
survobj.mst<-Surv(Tstart,Tstop,status)
survcheck(survobj.mst ~1,id= id,data= pone_long)

#This is a multi-state model. Note that the multitstate part is created by the transformation of data. Once this is done, estimation only needs a Cox model with the correct data set up

mstate1<-coxph(Surv(Tstart,Tstop,status)~ln.cases.1 + ln.cases.2 + ln.cases.3+ ln.students.1+ln.students.2+ ln.students.3+ governorpartyaffiliation.1 + governorpartyaffiliation.2 + governorpartyaffiliation.3 + strata(trans),data= pone_long,ties="efron")
summary(mstate1)
mstate1$loglik

################### FIGURE 5.4 ####################

#As noted before, predictions requiere the cases we want predictions for. Again, multi-state models do not allow for time-varying covariates. In this first step the code produces a baseline, and therefore all covariates are set to 0. The only movement in variables is the stratification, which indicates which transitions are relevant. In this case, there are only 3 possible transitions

nyu0<-data.frame(ln.cases = rep(0,3),
				ln.students = rep(0,3),
				governorpartyaffiliation = rep(0,3),
				trans = 1:3)
nyu0
attr(nyu0,"trans")<-tmat
class(nyu0)<-c("msdata","data.frame")
nyu0 <-expand.covs(nyu0,covs[2:4],longnames=F)
nyu0$strata<- 1:3
nyu0

#The following code produces the patient-specific baseline cumulative hazards
msf1.0<-msfit(mstate1,newdata=nyu0,trans=tmat)
summary(msf1.0)

#The following produces a plot
plot(msf1.0,ylab="A(t)",xlab="Days since 31/12/2019",lty=1:3,col=c("purple4","deeppink", "blue"),lwd=3, cex.lab=1.2)

#The following produces a ggplot for the book
msf1.0$Haz
f54cumhaz<-msf1.0$Haz$Haz
f54time<-msf1.0$Haz$time
f54data<-data.frame(f54time,f54cumhaz,Transition=c(rep("NoNPI->RW",29),rep("NoNPI->CC",29),rep("RW->CC",29)))

ggplot(data=f54data,aes(x=f54time,y=f54cumhaz,color=Transition,linetype=Transition))+geom_step(direction="hv",size=1)+theme_bw()+labs(x="Days since 31/12/2019", y="A(t)")+xlim(60,105)+ylim(0,50)+scale_color_manual(values=c("deeppink","purple4","blue2"))+scale_linetype_manual(values=c(2,1,3))+theme(text=element_text(size=20),axis.text=element_text(size=20),legend.text=element_text(size=14))



################### FIGURE 5.5  ###################

#Use patient-specific transition hazards for patient-specific transition probabilities

#The following code produces the transition probabilities from the Aalen-Johansen estimator. Here predt=0 is the starting time for the prediction: all graphs are for time values after that point in time (ie >.0), while direction="forward" is the direction of the prediction 

#Multistate models are complex and we need to be aware of computation warnings. Results are still consistent with previous results from the Aalen-Johansen empirical estimates of transition probabilities and general evidence

aj0<-probtrans(msf1.0,predt=0,direction="forward", method="greenwood")

#It is important that we specify from which state we want to predict, as there are multiple states. In this case, the code produces predictions from state 1, which is the state of no NPIs

#The following produces a plot 
plot(aj0,from=1, type="single",xlim=c(60,100),xlab="Days since 31/12/2019",lty=1:3,col=c("purple4","deeppink", "blue"),lwd=3,legend.pos=topleft, cex.lab=1.2)
legend(82,.63,c("No NPI", "Remote Work","Close Campus"), cex=1,col=c("purple4","deeppink", "blue"),lwd=3,lty=1:3)

#The following produces a ggplot for the book
summary(aj0)
aj0[[1]]
aj0[[1]]$pstate1
aj0[[1]]$time

f55time<-rep(aj0[[1]]$time,3)
f55p11<-aj0[[1]]$pstate1
f55p12<-aj0[[1]]$pstate2
f55p13<-aj0[[1]]$pstate3
f55allp<-c(f55p11,f55p12,f55p13)
f55data<-data.frame(f55time,f55allp,State=c(rep("No NPI",30),rep("Remote Work",30),rep("Close Campus",30)))

ggplot(data=f55data,aes(x=f55time,y=f55allp,color=State,linetype=State))+geom_step(direction="hv",size=1)+theme_bw()+labs(x="Days since 31/12/2019", y="Probability in State")+xlim(60,105)+ylim(0,1)+scale_color_manual(values=c("blue2","purple4","deeppink"))+scale_linetype_manual(values=c(3,1,2))+theme(text=element_text(size=20),axis.text=element_text(size=20),legend.text=element_text(size=16))


#END


